/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.tests;

import java.util.ArrayList;
import java.util.List;

import junit.framework.TestCase;
import mosdi.fa.Alphabet;
import mosdi.fa.CDFA;
import mosdi.fa.DFAFactory;
import mosdi.fa.FiniteMemoryTextModel;
import mosdi.fa.GeneralizedString;
import mosdi.fa.IIDTextModel;
import mosdi.fa.MarkovianTextModel;
import mosdi.paa.ClumpSizeCalculator;
import mosdi.util.Iupac;
import mosdi.util.IupacStringConstraints;
import mosdi.util.iterators.IupacPatternGenerator;

public class ClumpSizeCalculatorTest extends TestCase {

	static final String background = "CGGATCTGATTTTGCTGCGCCCAATGAAGGAAGGCGGCGGGTATCCCACGCTTTGGATCCGATACAATGGGTTAAACAGATGAATGAGAGTTTAGAATACGTTGCTCCAAAGTTCGTTCTAACCCAACAGACAGGCGCTCCGCGAACGTCTGTAAAAGAGGCATCTGCCACGCAATCGTGGAGTGCAGGCTGGTGTTGGGACCATAGTCAATCAATCTTCAGCAAGTTACTACAGGTACTCGAACGTCTG";

	private static void assertEquals(double[] expected, double[] actual, double accuracy) {
		assertEquals(expected.length, actual.length);
		for (int i=0; i<expected.length; ++i) {
			if (actual[i]==0.0) {
				assertTrue(expected[i]==0.0);
			} else {
				assertTrue(Math.abs((expected[i]/actual[i])-1.0)<accuracy);
			}
		}
	}

	public void testClumpSizeStatistics() {
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		String pattern = "ANNCNNA";
		List<GeneralizedString> l = new ArrayList<GeneralizedString>();
		l.add(Iupac.toGeneralizedString(pattern));
		CDFA cdfa = DFAFactory.build(dnaAlphabet, l);
		ClumpSizeCalculator csc = new ClumpSizeCalculator(new IIDTextModel(dnaAlphabet, background), cdfa, pattern.length());
		double[] clumpSizeDistribution = csc.clumpSizeDistribution(20, 1e-50);
		// we compare to exact results that were confirmed by simulation (1000 texts of length 1,000,000).
		// simulation results:
		// empiric counts:           [  0,           12834528,             1523481,               211258,                30301,                  4390,                   661,                   93,                    18,                     2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
		// empiric dist :            [0.0, 0.8787924352189413, 0.10431420446468993, 0.014465037769950178, 0.002074738516256238, 3.0058750821309146E-4, 4.5259303628440425E-5, 6.367799148933373E-6, 1.2324772546322658E-6, 1.3694191718136286E-7, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
		double[] expectedResultIID = {0.0, 0.8786827978268831, 0.10439958313925755, 0.014479415876725759, 0.002084225135296174, 3.0243972563833934E-4, 4.402905874706307E-5,  6.414826286207407E-6, 9.3489764022036E-7,    1.3626332218163257E-7, 1.986130272706012E-8, 2.894940422839106E-9, 4.2196171985172645E-10, 6.150448595027873E-11, 8.964801206213456E-12, 1.3066960406316699E-12, 1.904620694841389E-13, 2.7761467887589754E-14, 4.046470274384744E-15, 5.898074897943194E-16, 8.596946265556387E-17};
		assertEquals(expectedResultIID, clumpSizeDistribution, 1e-6);

		csc = new ClumpSizeCalculator(new MarkovianTextModel(1,dnaAlphabet, background), cdfa, pattern.length());
		clumpSizeDistribution = csc.clumpSizeDistribution(5, 1e-50);
		// we compare to exact results that were confirmed by simulation (1000 texts of length 1,000,000).
		// simulation results:
		// empiric counts:          [0, 12603022, 1556161, 226377, 34231, 5163]
		// empiric dist :           [0.0, 0.8736399786051121, 0.10787289451261053, 0.015692426581234997, 0.0023728888283803353, 3.5789854286838457E-4]
		double[] expectedResultM1 = {0.0, 0.8736849264939114, 0.10788851447724385, 0.015650704257016607, 0.0023547874971793933, 3.5704343522402016E-4};
		assertEquals(expectedResultM1, clumpSizeDistribution, 1e-6);
	}

	public void testExpectation() { 
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		FiniteMemoryTextModel textModel = new MarkovianTextModel(1, dnaAlphabet, background);

		int[] minFrequencies = {0,0,0,0};
		int[] maxFrequencies = {4,2,0,1};
		IupacPatternGenerator generator = new IupacPatternGenerator(4, new IupacStringConstraints(minFrequencies,maxFrequencies));
		for (int[] pattern : generator) {
			if (!iupacAlphabet.buildString(pattern).contains("A")) continue;
			// System.out.println(iupacAlphabet.buildString(pattern));
			List<GeneralizedString> l = new ArrayList<GeneralizedString>();
			l.add(Iupac.toGeneralizedString(iupacAlphabet.buildString(pattern)));
			//l.add(Iupac.iupacToGeneralizedString(alphabet, Iupac.reverseComplementary(pattern)));
			CDFA cdfa = DFAFactory.build(dnaAlphabet, l);
			ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
			double[] clumpSizeDistribution = csc.clumpSizeDistribution(30, 1e-150);
			double expectation = 0.0;
			for (int i=1; i<clumpSizeDistribution.length; ++i) {
				expectation+=clumpSizeDistribution[i]*i;
			}
			assertEquals(expectation, ClumpSizeCalculator.expectedClumpSize(textModel, pattern, false), 1e-6);
		}
	}

	public void testExpectationReverse() { 
		Alphabet dnaAlphabet = Alphabet.getDnaAlphabet();
		Alphabet iupacAlphabet = Alphabet.getIupacAlphabet();
		FiniteMemoryTextModel textModel = new MarkovianTextModel(1, dnaAlphabet, background);

		int[] minFrequencies = {0,0,0,0};
		int[] maxFrequencies = {4,2,0,1};
		IupacPatternGenerator generator = new IupacPatternGenerator(4, new IupacStringConstraints(minFrequencies,maxFrequencies));
		for (int[] pattern : generator) {
			String s = iupacAlphabet.buildString(pattern);
			if (s.compareTo(Iupac.reverseComplementary(s))>0) continue;
			if (!s.contains("A")) continue;
			// System.out.println(s);
			List<GeneralizedString> l = new ArrayList<GeneralizedString>();
			l.add(Iupac.toGeneralizedString(iupacAlphabet.buildString(pattern)));
			l.add(Iupac.toGeneralizedString(Iupac.reverseComplementary(s)));
			CDFA cdfa = DFAFactory.build(dnaAlphabet, l);
			ClumpSizeCalculator csc = new ClumpSizeCalculator(textModel, cdfa, pattern.length);
			double[] clumpSizeDistribution = csc.clumpSizeDistribution(50, 1e-150);
			double expectation = 0.0;
			for (int i=1; i<clumpSizeDistribution.length; ++i) {
				expectation+=clumpSizeDistribution[i]*i;
			}
			assertEquals(expectation, ClumpSizeCalculator.expectedClumpSize(textModel, pattern, true), 1e-6);
		}
	}

}
